在 S.O上有一個SQL問題Splits records based on range
在大家分享我的解法
有兩張表
Item Table
| Id | ItemId | FROM | To |
|----|--------|------|-----|
| 1 | 1 | 1 | 100 |
| 2 | 1 | 101 | 500 |
| 3 | 1 | 600 | 700 |
ItemRange Table
| Id | ItemId | FROM | To |
|----|--------|------|----|
| 1 | 1 | 50 | 60 |
| 2 | 1 | 70 | 80 |
期望輸出
| ItemId | FROM | TO |
|--------|------|-----|
| 1 | 1 | 49 |
| 1 | 61 | 69 |
| 1 | 81 | 500 |
| 1 | 600 | 700 |
提問者想要產生一個結果集,關於Item
表依照 ItemId
創立 [FROM]
到 [TO]
的值,但要排除在ItemRange
表連續範圍內的值。
例如:Item
表有一列 1 ~ 100
| ItemId | FROM | To |
| 1 | 1 | 100 |
但由於ItemRange
表有兩列 50 ~ 60
和70 ~ 80
| ItemId | FROM | To |
| 1 | 50 | 60 |
| 1 | 70 | 80 |
所以這部份期望輸出,Item
表要排除ItemRange
表連續範圍數字
如下
| ItemId | FROM | TO |
|--------|------|-----|
| 1 | 1 | 49 |
| 1 | 61 | 69 |
| 1 | 81 | xxx |
我看到連續範圍,就會聯想這個是一個(Islands and gaps problem)
我的想法是要先兩個full
連續範圍資料表,我先使用CTE遞迴
來實作
Item
ItemRange
之後再使用 except
對於 Item
排除 ItemRange
要排除的數字範圍
;WITH CTE AS (
SELECT ItemId,[FROM],[TO]
FROM Item
UNION ALL
SELECT ItemId,[FROM]+ 1,[TO]
FROM CTE
WHERE [FROM]+ 1 <= [TO]
), CTE2 AS(
SELECT ItemId,[FROM],[TO]
FROM ItemRange
UNION ALL
SELECT ItemId,[FROM]+ 1,[TO]
FROM CTE2
WHERE [FROM]+ 1 <= [TO]
),CTE3 AS(
SELECT ItemId,[FROM]
FROM CTE
except
SELECT ItemId,[FROM]
FROM CTE2
)
最後就是重頭戲,使用ROW_NUMBER + Window Function
做出標示連續範圍的群組
SELECT ItemId,
MIN([FROM]) 'FROM',
MAX([FROM]) 'TO'
FROM (
SELECT ItemId,[FROM],[FROM] - ROW_NUMBER() OVER(ORDER BY [FROM]) grp
FROM CTE3
) t1
GROUP BY grp,ItemId
option (maxrecursion 0)
目前提問者還有一個問題是他的範圍太大,導致執行效能不是很好 這個問題我目前還沒有特別的解法 歡迎有想法的大大可以提供建議
P.S. 我覺得此問題很清楚,但我不知道為什麼會被投Close
這題好難,想試試看能不能不用迴圈寫出來。
作法分為兩階段:
SELECT A.ItemId, A.[FROM],
CASE WHEN A.[TO] > B.[TO] THEN A.[TO] ELSE B.[TO] END AS [TO],
--重新編一個Id
A.Id+'-1' AS Id,
A.Id AS PId1,
CAST(B.Id AS NVARCHAR(MAX)) AS PId2
FROM CTE_MERGE AS A
找到可以合併的區間後,會加入一筆新的區間,並紀錄 PId1、PId2 代表被合併的兩個區間,之後 CLEAR CTE 中就可以利用 PId1、PId2 排除不要的資料。
CROSS APPLY (VALUES(1),(2)) AS C(G)
這邊需要把區間拆開,一直想不到用什麼方法可以將一筆資料拆成兩筆,大部分時間是卡在這裡,最後發現可以用 CROSS APPLY
+ VALUES
,在配合 CASE WHEN
就可以漂亮的把區間拆開。
CASE WHEN G=1 THEN A.[FROM] ELSE B.[TO]+1 END AS [FROM],
CASE WHEN G=1 THEN B.[FROM]-1 ELSE A.[TO] END AS [TO],
最後這邊是關鍵,本來我以為需要寫很多判斷,因為 Item、ItemRange 交集會產生很多種可能,但很神奇的是只用兩行就完成了,出乎意料。
情況如下:
第一種:
A:10-20 [ ]
B:18-25 [ ]
Range1: 10-17
Range2: 26-20
第二種:
A:10-20 [ ]
B:05-12 [ ]
Range1: 10-4
Range2: 13-20
第三種:
A:10-20 [ ]
B:12-18 [ ]
Range1: 10-11
Range2: 19-20
第四種:
A:10-20 [ ]
B:05-25 [ ]
Range1: 10-04
Range2: 16-20
第五種:
A:10-20 [ ]
B:10-20 [ ]
Range1: 10-09
Range2: 21-20
最後只要把負區間去掉,就是期望的結果。
ItemId FROM TO
--------|-----|-------
1 1 49
1 61 69
1 81 500
1 600 700
總共用了四個 CTE 完成:
DECLARE @Item TABLE
(
[Id] INT,
[ItemId] INT,
[FROM] INT,
[TO] INT
)
DECLARE @ItemRange TABLE
(
[Id] INT,
[ItemId] INT,
[FROM] INT,
[TO] INT
)
INSERT INTO @Item VALUES
(1,1,1,100),
(2,1,101,500),
(3,1,600,700)
INSERT INTO @ItemRange VALUES
(1,1,50,60),
(2,1,70,80)
--合併Item重疊部分
--排除被完全包含的區間 A[ B[] ]
--並合併下面這種類型的區間
-- A[ ]
-- B [ ]
;WITH CTE_MERGE AS
(
SELECT A.ItemId, A.[FROM], A.[TO],
CAST(A.Id AS NVARCHAR(MAX)) AS Id,
CAST(NULL AS NVARCHAR(MAX)) AS PId1,
CAST(NULL AS NVARCHAR(MAX)) AS PId2
FROM @Item AS A
LEFT JOIN @Item AS B
ON B.ItemId=A.ItemId AND
--排除A被包在B的資料,這種資料會造成下方無窮迴圈
B.Id<>A.Id AND
B.[FROM] <= A.[FROM] AND B.[TO] >= A.[TO]
WHERE B.Id IS NULL
UNION ALL
SELECT A.ItemId, A.[FROM],
CASE WHEN A.[TO] > B.[TO] THEN A.[TO] ELSE B.[TO] END AS [TO],
--重新編一個Id
A.Id+'-1' AS Id,
A.Id AS PId1,
CAST(B.Id AS NVARCHAR(MAX)) AS PId2
FROM CTE_MERGE AS A
CROSS APPLY (
SELECT *
FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY B.Id) AS R FROM @Item AS B
WHERE B.ItemId=A.ItemId AND
--找到可以合併的區間
CAST(B.Id AS NVARCHAR(MAX))<>A.Id AND
A.[TO]+1 >= B.[FROM] AND A.[TO]+1 <= B.[TO]
) AS X
WHERE X.R=1
) AS B
)
--SELECT * FROM CTE_MERGE
--排除遞迴中殘留的部分
,CTE_MERGE_CLEAR AS
(
SELECT A.Id, A.ItemId, A.[FROM], A.[TO]
FROM CTE_MERGE AS A
LEFT JOIN CTE_MERGE AS B ON A.Id=B.PId1 OR A.Id=B.PId2
--沒有父Id的資料才是最後結果
WHERE B.Id IS NULL
)
--SELECT * FROM CTE_MERGE_CLEAR
--排除ItemRange部分
--判斷重疊有這五種可能
-- A [ ] [ ] [ ] [ ] [ ]
-- B [ ] [ ] [ ] [ ] [ ]
,CTE AS
(
SELECT ItemId, [FROM], [TO], Id,
CAST(NULL AS NVARCHAR(MAX)) AS PId
FROM CTE_MERGE_CLEAR
UNION ALL
SELECT *
FROM (
SELECT A.ItemId,
CASE WHEN G=1 THEN A.[FROM] ELSE B.[TO]+1 END AS [FROM],
CASE WHEN G=1 THEN B.[FROM]-1 ELSE A.[TO] END AS [TO],
--重新編一個Id
CASE WHEN G=1 THEN A.Id+'-1' ELSE A.Id+'-2' END AS Id,
A.Id AS PId
FROM CTE AS A
CROSS APPLY (
SELECT *
FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY B.Id) AS R FROM @ItemRange AS B
WHERE B.ItemId=A.ItemId AND
--找到Item和ItemRange重疊的部分
((A.[FROM] >= B.[FROM] AND A.[FROM] <= B.[TO]) OR
(A.[TO] >= B.[FROM] AND A.[TO] <= B.[TO]) OR
(A.[FROM] <= B.[FROM] AND A.[TO] >= B.[TO]))
) AS X
--取一筆就好,一次迴圈拆一次區間就好
WHERE X.R=1
) AS B
--將一筆資料拆成兩筆
CROSS APPLY (VALUES(1),(2)) AS C(G)
) AS X
--負區間就不要了
WHERE X.[TO]-X.[FROM] > 0
)
--SELECT * FROM CTE
--排除遞迴中殘留的部分
,CTE_CLEAR AS
(
SELECT A.ItemId, A.[FROM], A.[TO]
FROM CTE AS A
LEFT JOIN CTE AS B ON A.Id=B.PId
--沒有父Id的資料才是最後結果
WHERE B.Id IS NULL
)
SELECT *
FROM CTE_CLEAR
ORDER BY ItemId, [FROM]
CTE 效能不是很好,用 TEMP TABLE 優化:
DECLARE @Item TABLE
(
[Id] INT,
[ItemId] INT,
[FROM] INT,
[TO] INT
)
DECLARE @ItemRange TABLE
(
[Id] INT,
[ItemId] INT,
[FROM] INT,
[TO] INT
)
INSERT INTO @Item VALUES
(1,1,1,100),
(2,1,101,500),
(3,1,600,700)
INSERT INTO @ItemRange VALUES
(1,1,50,60),
(2,1,70,80)
--暫存表優化CTE
DECLARE @TEMP_MERGE TABLE
(
ItemId INT,
[FROM] INT,
[TO] INT,
Id NVARCHAR(MAX),
PId1 NVARCHAR(MAX),
PId2 NVARCHAR(MAX)
)
DECLARE @TEMP_MERGE_CLEAR TABLE
(
Id NVARCHAR(MAX),
ItemId INT,
[FROM] INT,
[TO] INT
)
DECLARE @TEMP TABLE
(
ItemId INT,
[FROM] INT,
[TO] INT,
Id NVARCHAR(MAX),
PId NVARCHAR(MAX)
)
--合併Item重疊部分
;WITH CTE_MERGE AS
(
SELECT A.ItemId, A.[FROM], A.[TO],
CAST(A.Id AS NVARCHAR(MAX)) AS Id,
CAST(NULL AS NVARCHAR(MAX)) AS PId1,
CAST(NULL AS NVARCHAR(MAX)) AS PId2
FROM @Item AS A
LEFT JOIN @Item AS B
ON B.ItemId=A.ItemId AND
B.Id<>A.Id AND
B.[FROM] <= A.[FROM] AND B.[TO] >= A.[TO]
WHERE B.Id IS NULL
UNION ALL
SELECT A.ItemId, A.[FROM],
CASE WHEN A.[TO] > B.[TO] THEN A.[TO] ELSE B.[TO] END AS [TO],
A.Id+'-1' AS Id,
A.Id AS PId1,
CAST(B.Id AS NVARCHAR(MAX)) AS PId2
FROM CTE_MERGE AS A
CROSS APPLY (
SELECT *
FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY B.Id) AS R FROM @Item AS B
WHERE B.ItemId=A.ItemId AND
CAST(B.Id AS NVARCHAR(MAX))<>A.Id AND
A.[TO]+1 >= B.[FROM] AND A.[TO]+1 <= B.[TO]
) AS X
WHERE X.R=1
) AS B
)
INSERT INTO @TEMP_MERGE
SELECT * FROM CTE_MERGE
--SELECT * FROM @TEMP_MERGE
--排除遞迴中殘留的部分
;WITH CTE_MERGE_CLEAR AS
(
SELECT A.Id, A.ItemId, A.[FROM], A.[TO]
FROM @TEMP_MERGE AS A
LEFT JOIN @TEMP_MERGE AS B ON A.Id=B.PId1 OR A.Id=B.PId2
WHERE B.Id IS NULL
)
INSERT INTO @TEMP_MERGE_CLEAR
SELECT * FROM CTE_MERGE_CLEAR
--SELECT * FROM @TEMP_MERGE_CLEAR
--排除ItemRange部分
;WITH CTE AS
(
SELECT ItemId, [FROM], [TO], Id,
CAST(NULL AS NVARCHAR(MAX)) AS PId
FROM @TEMP_MERGE_CLEAR
UNION ALL
SELECT *
FROM (
SELECT A.ItemId,
CASE WHEN G=1 THEN A.[FROM] ELSE B.[TO]+1 END AS [FROM],
CASE WHEN G=1 THEN B.[FROM]-1 ELSE A.[TO] END AS [TO],
CASE WHEN G=1 THEN A.Id+'-1' ELSE A.Id+'-2' END AS Id,
A.Id AS PId
FROM CTE AS A
CROSS APPLY (
SELECT *
FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY B.Id) AS R FROM @ItemRange AS B
WHERE B.ItemId=A.ItemId AND
((A.[FROM] >= B.[FROM] AND A.[FROM] <= B.[TO]) OR
(A.[TO] >= B.[FROM] AND A.[TO] <= B.[TO]) OR
(A.[FROM] <= B.[FROM] AND A.[TO] >= B.[TO]))
) AS X
WHERE X.R=1
) AS B
CROSS APPLY (VALUES(1),(2)) AS C(G)
) AS X
WHERE X.[TO]-X.[FROM] > 0
)
INSERT INTO @TEMP
SELECT * FROM CTE
--SELECT * FROM @TEMP
--排除遞迴中殘留的部分
;WITH CTE_CLEAR AS
(
SELECT A.ItemId, A.[FROM], A.[TO]
FROM @TEMP AS A
LEFT JOIN @TEMP AS B ON A.Id=B.PId
WHERE B.Id IS NULL
)
SELECT *
FROM CTE_CLEAR
ORDER BY ItemId, [FROM]